When you finished with the notebook, then convert it to html!
jupyter-nbconvert --execute worksheet-interactive.ipynb
You can find further interactive tools on the pyviz site:
Note:
I had to use the jupyter-nbconvert --execute worksheet-interactive.ipynb --ExecutePreprocessor.timeout=180 command to prevent timeout due to long computations.
I've chosen Chicago city's "Crimes - 2001 to present" dataset.
import os
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
data_dir = './/data//'
data_file = data_dir + os.listdir(data_dir)[-1]
crime_data = pd.read_csv(data_file)
crime_data.head()
X = crime_data['Longitude']
Y = crime_data['Latitude']
# Clear NaN values
X = X[~np.isnan(X)]
Y = Y[~np.isnan(Y)]
print('There are {0}/{1} entries ({2:.3f}% of all entries) without coordinates.'.format(len(crime_data) - len(X),
len(crime_data),
(1 - len(X)/len(crime_data))*100))
geoviews, datashader and geopandas¶from functools import partial
import colorcet as cc
import datashader as ds
import datashader.transfer_functions as tf
from datashader.colors import colormap_select
import geoviews as gv
from geoviews import opts
import geopandas as gpd
import cartopy.crs as ccrs
from holoviews.operation.datashader import datashade
gv.extension('bokeh')
cmap = partial(colormap_select)
fs = 1
cvs = ds.Canvas(plot_width=fs*600, plot_height=fs*600)
agg = cvs.points(crime_data, 'Longitude', 'Latitude')
ds.transfer_functions.Image.border=0
img = tf.set_background(tf.shade(agg, cmap=cmap(cc.fire, 0.2), how='log'), 'black')
img
The plot indicates, there is a faulty row (or rows) in the database where the coordinates are incorrect. We can easily filter these entries out.
# Delete incorrect entries from crime_data and update the original
# Cutoff latitude was aquired by plotting the data on a temporary
# scatter plot
cutoff_lat = 41.6
crime_coords = crime_data[crime_data['Latitude']>cutoff_lat][['Latitude', 'Longitude']]
# Determine correct scale of figure to create
# a figure with correct proportions
x_max = np.max(crime_coords['Longitude'])
x_min = np.min(crime_coords['Longitude'])
y_max = np.max(crime_coords['Latitude'])
y_min = np.min(crime_coords['Latitude'])
x_per_y = (x_max - x_min) / (y_max - y_min)
Visualization using geoviews, datashader and geopandas
districts = gpd.read_file('./data/chicago_districts.shp',
encoding='utf8')
districts['area_km2'] = districts['shape_area'] / 1000 / 1000
districts.head()
# Load districts as Polygons and Locations as Points and Overlay them on map
polys = gv.Polygons(data=districts, vdims=['community', 'area_km2'], crs=ccrs.PlateCarree())
points = gv.Points(data=crime_coords, kdims=['Longitude', 'Latitude'], crs=ccrs.PlateCarree())
plot = gv.tile_sources.CartoDark()\
* datashade(points, expand=False, height=2000, width=2000,
cmap=cc.fire, normalization='eq_hist')\
* polys.opts(alpha=0.1, color='white', tools=['hover'])
plot.opts(width=int(1000*x_per_y), height=1000, bgcolor='black')